In [2]:
import pandas as pd
pd.Series?
In [3]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)
Out[3]:
In [4]:
numbers = [1,2,3,4]
pd.Series(numbers)
Out[4]:
In [5]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)
Out[5]:
In [6]:
numbers = [1, 2, None]
pd.Series(numbers)
Out[6]:
In [7]:
import numpy as np
In [8]:
np.nan == None
Out[8]:
In [9]:
np.isnan(np.nan)
Out[9]:
In [10]:
sports = {'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s
Out[10]:
In [11]:
s.index
Out[11]:
In [12]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s
Out[12]:
In [13]:
sports = {'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s
Out[13]:
In [21]:
sports = {'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s
Out[21]:
In [22]:
s.iloc[3]
Out[22]:
In [23]:
s.loc['Golf']
Out[23]:
In [24]:
s[3]
Out[24]:
In [25]:
s['Golf']
Out[25]:
In [17]:
sports = {99: 'Bhutan',
100: 'Scotland',
101: 'Japan',
102: 'South Korea'}
s = pd.Series(sports)
In [26]:
#s[0] #generates error
In [27]:
import numpy as np
In [28]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s
Out[28]:
In [29]:
total = 0
for item in s:
total+=item
print(total)
In [30]:
total = np.sum(s)
print(total)
In [32]:
s = pd.Series(np.random.randint(0,1000, 1000))
s.head()
Out[32]:
In [33]:
len(s)
Out[33]:
In [41]:
%%timeit
summary = 0
for item in s:
summary+=item
In [42]:
%%timeit
summary = np.sum(s)
In [43]:
s+=2 #adds two to each item in s using broadcasting
s.head()
Out[43]:
In [44]:
for label, value in s.iteritems():
s.set_value(label, value+2)
s.head()
Out[44]:
In [45]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
for label, value in s.iteritems():
s.loc[label]= value+2
In [46]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2
In [47]:
s = pd.Series([1, 2, 3])
s.loc['Animal'] = 'Bears'
s
Out[47]:
In [48]:
original_sports = pd.Series({'Archery': 'Bhutan',
'Golf': 'Scotland',
'Sumo': 'Japan',
'Taekwondo': 'South Korea'})
cricket_loving_countries = pd.Series(['Australia',
'Barbados',
'Pakistan',
'England'],
index=['Cricket',
'Cricket',
'Cricket',
'Cricket'])
all_countries = original_sports.append(cricket_loving_countries)
In [49]:
original_sports #is not changed by append
Out[49]:
In [50]:
cricket_loving_countries
Out[50]:
In [51]:
all_countries
Out[51]:
In [53]:
all_countries.loc['Cricket'] #interesting
Out[53]:
In [54]:
import pandas as pd
purchase_1 = pd.Series({'Name': 'Chris',
'Item Purchased': 'Dog Food',
'Cost': 22.50})
purchase_2 = pd.Series({'Name': 'Kevyn',
'Item Purchased': 'Kitty Litter',
'Cost': 2.50})
purchase_3 = pd.Series({'Name': 'Vinod',
'Item Purchased': 'Bird Seed',
'Cost': 5.00})
df = pd.DataFrame([purchase_1, purchase_2, purchase_3], index=['Store 1', 'Store 1', 'Store 2'])
df.head()
Out[54]:
In [58]:
df.loc['Store 2']['Cost']
Out[58]:
In [59]:
type(df.loc['Store 2'])
Out[59]:
In [60]:
df.loc['Store 1', 'Cost']
Out[60]:
In [61]:
df.T
Out[61]:
In [62]:
df.T.loc['Cost']
Out[62]:
In [63]:
df['Cost']
Out[63]:
In [67]:
df.loc['Store 1']['Cost']
Out[67]:
In [68]:
df.loc[:, ['Name', 'Cost']]
Out[68]:
In [69]:
df.drop('Store 1')
Out[69]:
In [70]:
df
Out[70]:
In [71]:
copy_df = df.copy()
copy_df = copy_df.drop('Store 1')
copy_df
Out[71]:
In [72]:
copy_df.drop?
In [73]:
del copy_df['Name']
In [74]:
df['Location'] = None
df.set_index()
Out[74]:
In [75]:
costs = df['Cost']
In [76]:
costs+=2
In [77]:
df
Out[77]:
In [78]:
!cat olympics.csv
In [83]:
df['Cost'] = df['Cost']*0.8
In [85]:
df['Cost'] *= 0.8
In [86]:
pwd
Out[86]:
In [88]:
df = pd.read_csv('../Week 2/olympics.csv')
df.head()
Out[88]:
In [90]:
df = pd.read_csv('../Week 2/olympics.csv', skiprows=1, index_col=0)
df.head()
Out[90]:
In [91]:
df.columns
Out[91]:
In [92]:
for col in df.columns:
if col[:2] == '01':
df.rename(columns={col: 'Gold' + col[4:]}, inplace=True)
if col[:2] == '02':
df.rename(columns={col: 'Silver' + col[4:]}, inplace=True)
if col[:2] == '03':
df.rename(columns={col: 'Bronze' + col[4:]}, inplace=True)
if col[:2] == '№':
df.rename(columns={col: '#' + col[1:]}, inplace=True)
df.head()
Out[92]:
In [94]:
#df['Gold'] > 0
In [96]:
only_gold = df.where(df['Gold'] > 0)
only_gold.head()
Out[96]:
In [98]:
only_gold = only_gold.dropna(axis=0)
only_gold.head()
Out[98]:
In [100]:
# alternatively more concise
only_gold = df[df['Gold'] > 0]
only_gold.head()
Out[100]:
In [101]:
len(df[(df['Gold'] > 0) | (df['Gold.1'] > 0)])
Out[101]:
In [102]:
df[(df['Gold.1'] > 0) & (df['Gold'] == 0)]
Out[102]:
In [103]:
df.head()
Out[103]:
In [108]:
df['Country'] = df.index
In [109]:
df = df.set_index('Gold')
df.head()
Out[109]:
In [111]:
df = df.reset_index()
In [176]:
df = pd.read_csv('../Week 2/census.csv', encoding='iso-8859-1')
In [177]:
df.head()
Out[177]:
In [178]:
df['SUMLEV'].unique()
Out[178]:
In [179]:
df = df[df['SUMLEV'] == 50]
df.head()
Out[179]:
In [180]:
columns_to_keep = ['STNAME',
'CTYNAME',
'BIRTHS2010',
'BIRTHS2011',
'BIRTHS2012',
'BIRTHS2013',
'BIRTHS2014',
'BIRTHS2015',
'POPESTIMATE2010',
'POPESTIMATE2011',
'POPESTIMATE2012',
'POPESTIMATE2013',
'POPESTIMATE2014',
'POPESTIMATE2015']
df = df[columns_to_keep]
df.head()
Out[180]:
In [181]:
df = df.set_index(['STNAME', 'CTYNAME'])
df.head()
Out[181]:
In [182]:
df.loc['Michigan', 'Washtenaw County']
Out[182]:
In [183]:
df.loc[[('Michigan', 'Washtenaw County'),
('Michigan', 'Wayne County')]]
Out[183]:
In [169]:
df = pd.read_csv('../Week 2/log.csv')
df
Out[169]:
In [170]:
df = df.set_index('time')
df = df.sort_index()
df
Out[170]:
In [171]:
df = df.reset_index()
df = df.set_index(['time', 'user'])
df
Out[171]:
In [172]:
df = df.fillna(method= 'ffill')
df.head()
Out[172]:
In [ ]: